"""
猫眼电影top100抓取
"""
import requests
import re
import time
import random

class MaoYanSpider:
    def __init__(self):
        self.url = 'https://maoyan.com/board/4?offset={}'
        self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.21a2 Safari/537.36'}

    def get_html(self, url):
        """请求"""
        html = requests.get(url=url, headers=self.headers).text
        # 调用解析函数
        self.parse_html(html)

    def parse_html(self, html):
        """正则解析提取数据"""
        regex = '<div class="movie-item-info">.*?title="(.*?)".*?<p class="star">(.*?)</p>.*?<p class="releasetime">(.*?)</p>'
        # [('名称','主演','时间'),(),...]
        r_list = re.findall(regex, html, re.S)
        for r in r_list:
            print(r)

    def crawl(self):
        for offset in range(0, 91, 10):
            page_url = self.url.format(offset)
            self.get_html(url=page_url)
            # 控制频率
            time.sleep(random.randint(1, 3))

if __name__ == '__main__':
    spider = MaoYanSpider()
    spider.crawl()